import requests
from lxml import etree

head = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26"
}


def getoldhooms(url,num):
    rep = requests.get(url,headers=head).text
    html = etree.HTML(rep)
    hoom = html.xpath('/html/body/div[1]/div/div/section/section[3]/section[1]/section[2]/div')
    for web in hoom:
        mylist = [
            "基本信息："+web.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0],
            "规格："+"".join(web.xpath('./a/div[2]/div[1]/section/div[1]/p[1]/span/text()')),
            "面积："+str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[2]/text()')[0]).replace(" ","").replace("\n",""),
            "朝向："+web.xpath('./a/div[2]/div[1]/section/div[1]/p[3]/text()')[0],
            "楼层数："+str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[4]/text()')[0]).replace(" ","").replace("\n",""),
            "建造年份："+str(web.xpath('./a/div[2]/div[1]/section/div[1]/p[5]/text()')[0]).replace(" ","").replace("\n",""),
            "所在地名："+web.xpath('./a/div[2]/div[1]/section/div[2]/p[1]/text()')[0],
            "地址："+"-".join(web.xpath('./a/div[2]/div[1]/section/div[2]/p[2]/span/text()')),
            "负责人："+web.xpath('./a/div[2]/div[1]/div[2]/div/span[1]/text()')[0],
            "评分："+web.xpath('./a/div[2]/div[1]/div[2]/div/span[2]/text()')[0],
            "公司："+web.xpath('./a/div[2]/div[1]/div[2]/div/span[3]/text()')[0],
            "总价："+"".join(web.xpath('./a/div[2]/div[2]/p[1]/span/text()')),
            "单价："+web.xpath('./a/div[2]/div[2]/p[2]/text()')[0]
        ]
        f.write(f"*****第{num}套二手房*****\n"+"\n".join(mylist))
        ping = web.xpath('./a/div[2]/div[1]/section/div[3]/span/text()')
        if ping:
            f.write("\n"+"评价："+"-".join(ping)+"\n\n")
        else:
            f.write("\n\n")
        num += 1


with open("D:/JetBrains Projects/Pycharm Projects/python-learn/爬虫作业/安居客-二手房/安居客-二手房.txt","a",encoding="UTF-8") as f:
    for i in range(3):
        url = f"https://yongzhou.anjuke.com/sale/p{i+1}/?from=HomePage_TopBar"
        getoldhooms(url, 60*i+1)
